winsafe\kernel\utilities/
w_string.rs

1use std::cmp::Ordering;
2
3use crate::co;
4use crate::decl::*;
5use crate::guard::*;
6use crate::kernel::ffi;
7
8/// Stores a `[u16]` buffer for a null-terminated
9/// [Unicode UTF-16](https://learn.microsoft.com/en-us/windows/win32/intl/unicode-in-the-windows-api)
10/// wide string natively used by Windows.
11///
12/// Uses
13/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
14/// technique for faster performance.
15///
16/// This is struct is mostly used internally by the library, as a bridge between
17/// Windows and Rust strings.
18#[derive(Default, Clone)]
19pub struct WString {
20	buf: Buffer,
21}
22
23impl std::fmt::Display for WString {
24	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25		let txt = match self.buf.to_string_checked() {
26			Ok(t) => t,
27			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
28		};
29		std::fmt::Display::fmt(&txt, f)
30	}
31}
32impl std::fmt::Debug for WString {
33	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34		std::fmt::Debug::fmt(&self.buf, f)
35	}
36}
37
38impl std::cmp::PartialEq for WString {
39	fn eq(&self, other: &Self) -> bool {
40		self.cmp(other) == Ordering::Equal
41	}
42}
43impl std::cmp::Eq for WString {}
44
45impl std::cmp::PartialOrd for WString {
46	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
47		let ord = unsafe { ffi::lstrcmpW(self.as_ptr(), other.as_ptr()) };
48		Some(if ord < 0 {
49			Ordering::Less
50		} else if ord > 0 {
51			Ordering::Greater
52		} else {
53			Ordering::Equal
54		})
55	}
56}
57impl std::cmp::Ord for WString {
58	fn cmp(&self, other: &Self) -> Ordering {
59		self.partial_cmp(other).unwrap()
60	}
61}
62
63impl WString {
64	/// Stack size for internal
65	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html).
66	pub const SSO_LEN: usize = Buffer::SSO_LEN;
67
68	/// Stores an UTF-16 null-terminated string from an optional [`&str`](str).
69	///
70	/// If `s` is `None` or the string is empty, no allocation is made.
71	#[must_use]
72	pub fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
73		Self { buf: Buffer::from_opt_str(s) }
74	}
75
76	/// Stores an UTF-16 null-terminated string from a [`&str`](str).
77	///
78	/// If the string is empty, no allocation is made.
79	#[must_use]
80	pub fn from_str(s: impl AsRef<str>) -> Self {
81		Self { buf: Buffer::from_str(s, ForceHeap::No) }
82	}
83
84	/// Stores an UTF-16 null-terminated string from a [`&str`](str), bypassing
85	/// [Short String Optimization](https://joellaity.com/2020/01/31/string.html)
86	/// – that is, forcing the internal allocation on the heap. This should be
87	/// rarely needed.
88	///
89	/// If the string is empty, no allocation is made.
90	#[must_use]
91	pub fn from_str_force_heap(s: impl AsRef<str>) -> Self {
92		Self { buf: Buffer::from_str(s, ForceHeap::Yes) }
93	}
94
95	/// Stores a series of UTF-16 null-terminated strings. The buffer will end
96	/// with two terminating nulls – that means further retrieval operations
97	/// will "see" only the first string.
98	///
99	/// If the slice is empty, no allocation is made.
100	///
101	/// This method can be used as an escape hatch to interoperate with other
102	/// libraries.
103	#[must_use]
104	pub fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
105		Self { buf: Buffer::from_str_vec(v) }
106	}
107
108	/// Stores an UTF-16 null-terminated string by copying from a buffer,
109	/// specifying the number of chars to be copied.
110	///
111	/// The `src` buffer doesn't need to be null-terminated.
112	#[must_use]
113	pub fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
114		Self {
115			buf: Buffer::from_wchars_count(src, num_chars),
116		}
117	}
118
119	/// Stores an UTF-16 null-terminated string by copying from a
120	/// null-terminated buffer. The string length is retrieved with
121	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
122	///
123	/// # Safety
124	///
125	/// Be sure the string is null-terminated, otherwise `lstrlen` will get
126	/// lost, possibly reading an invalid memory location.
127	#[must_use]
128	pub unsafe fn from_wchars_nullt(src: *const u16) -> Self {
129		Self {
130			buf: unsafe { Buffer::from_wchars_nullt(src) },
131		}
132	}
133
134	/// Stores an UTF-16 null-terminated string by copying from a slice.
135	///
136	/// The `src` slice doesn't need to be null-terminated.
137	#[must_use]
138	pub fn from_wchars_slice(src: &[u16]) -> Self {
139		Self { buf: Buffer::from_wchars_slice(src) }
140	}
141
142	/// Constructs a new, empty `WString`. No allocation is made.
143	#[must_use]
144	pub const fn new() -> Self {
145		Self { buf: Buffer::new() }
146	}
147
148	/// Allocates an UTF-16 buffer with an specific length. All elements will be
149	/// set to zero.
150	#[must_use]
151	pub fn new_alloc_buf(sz: usize) -> Self {
152		Self {
153			buf: Buffer::new_alloc_buf(sz, ForceHeap::No),
154		}
155	}
156
157	/// Returns a mutable
158	/// [`LPWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
159	/// pointer to the internal UTF-16 string buffer, to be passed to native
160	/// Win32 functions. This is useful to receive strings.
161	///
162	/// # Panics
163	///
164	/// Panics if the buffer was not allocated.
165	///
166	/// # Safety
167	///
168	/// Be sure to alloc enough room, otherwise a buffer overrun may occur.
169	#[must_use]
170	pub unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
171		unsafe { self.buf.as_mut_ptr() }
172	}
173
174	/// Returns a mutable slice to the internal UTF-16 string buffer.
175	#[must_use]
176	pub fn as_mut_slice(&mut self) -> &mut [u16] {
177		self.buf.as_mut_slice()
178	}
179
180	/// Returns a
181	/// [`LPCWSTR`](https://learn.microsoft.com/en-us/windows/win32/learnwin32/working-with-strings)
182	/// pointer to the internal UTF-16 string buffer, to be passed to native
183	/// Win32 functions.
184	///
185	/// If the buffer was not allocated, returns a null pointer.
186	#[must_use]
187	pub fn as_ptr(&self) -> *const u16 {
188		self.buf.as_ptr()
189	}
190
191	/// Returns a slice to the internal UTF-16 string buffer.
192	#[must_use]
193	pub fn as_slice(&self) -> &[u16] {
194		self.buf.as_slice()
195	}
196
197	/// Returns the size of the allocated internal buffer, in `u16` wide chars.
198	/// Note that the terminating null, if existing, is also counted.
199	///
200	/// If the buffer was not allocated yet, returns zero.
201	#[must_use]
202	pub const fn buf_len(&self) -> usize {
203		self.buf.buf_len()
204	}
205
206	/// Copies the content into an external buffer. A terminating null will be
207	/// appended.
208	///
209	/// If `dest` is smaller, the string will be truncated.
210	///
211	/// If `dest` has 1 element, it will receive only the terminating null.
212	pub fn copy_to_slice(&self, dest: &mut [u16]) {
213		if !dest.is_empty() {
214			let usable_len = dest.len() - 1; // leave room for terminating null
215			self.as_slice()
216				.iter()
217				.zip(dest[..usable_len].iter_mut())
218				.for_each(|(src, dest)| *dest = *src);
219			dest[usable_len..]
220				.iter_mut()
221				.for_each(|dest| *dest = 0x0000); // fill the rest with zero
222		}
223	}
224
225	/// Fills the entire buffer with zeros.
226	pub fn fill_with_zero(&mut self) {
227		self.as_mut_slice().iter_mut().for_each(|ch| *ch = 0x0000);
228	}
229
230	/// Returns `true` if the internal buffer has been allocated.
231	#[must_use]
232	pub const fn is_allocated(&self) -> bool {
233		self.buf.is_allocated()
234	}
235
236	/// Converts into [`String`](std::string::String) by calling
237	/// [`String::from_utf16`](std::string::String::from_utf16). An uncallocated
238	/// will simply be converted into an empty string.
239	///
240	/// This method is useful if you're parsing raw data which may contain
241	/// invalid characters. If you're dealing with a string known to be valid,
242	/// [`to_string`](std::string::ToString::to_string) is more practical.
243	#[must_use]
244	pub fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
245		self.buf.to_string_checked()
246	}
247
248	/// Wrapper to
249	/// [`lstrlen`](https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-lstrlenw).
250	///
251	/// Returns the number of [`u16`] characters stored in the internal buffer,
252	/// not counting the terminating null.
253	#[must_use]
254	pub fn str_len(&self) -> usize {
255		unsafe { ffi::lstrlenW(self.buf.as_ptr()) as _ }
256	}
257
258	/// Converts the string to lower case, in-place. Wrapper to
259	/// [`CharLower`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charlowerw).
260	pub fn make_lowercase(&mut self) {
261		unsafe {
262			ffi::CharLowerW(self.as_mut_ptr());
263		}
264	}
265
266	/// Converts the string to upper case, in-place. Wrapper to
267	/// [`CharUpper`](https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-charupperw).
268	pub fn make_uppercase(&mut self) {
269		unsafe {
270			ffi::CharUpperW(self.as_mut_ptr());
271		}
272	}
273
274	/// Guesses the encoding with [`Encoding::guess`](crate::Encoding::guess)
275	/// and parses the data as a string.
276	///
277	/// If you're sure the data has UTF-8 encoding, you can also use the
278	/// built-in [`String::from_utf8`](std::string::String::from_utf8).
279	///
280	/// To serialize the string back into UTF-8 bytes, use the built-in
281	/// [`String::into_bytes`](std::string::String::into_bytes).
282	///
283	/// # Examples
284	///
285	/// Usually the fastest way to read the text from a file is by mapping its
286	/// contents in memory with [`FileMapped`](crate::FileMapped), then parsing:
287	///
288	/// ```no_run
289	/// use winsafe::{self as w, prelude::*};
290	///
291	/// let file_in = w::FileMapped::open(
292	///     "C:\\Temp\\foo.txt",
293	///     w::FileAccess::ExistingReadOnly,
294	/// )?;
295	/// let wstr = w::WString::parse(file_in.as_slice())?;
296	/// let str_contents = wstr.to_string();
297	/// # w::SysResult::Ok(())
298	/// ```
299	#[must_use]
300	pub fn parse(data: &[u8]) -> SysResult<Self> {
301		let mut data = data;
302		if data.is_empty() {
303			return Ok(Self::new()); // nothing to parse
304		}
305
306		let (encoding, sz_bom) = Encoding::guess(data);
307		data = &data[sz_bom..]; // skip BOM, if any
308
309		Ok(Self::from_wchars_slice(&match encoding {
310			Encoding::Ansi => Self::parse_ansi(data),
311			Encoding::Win1252 => MultiByteToWideChar(co::CP::WINDOWS_1252, co::MBC::NoValue, data)?,
312			Encoding::Utf8 => MultiByteToWideChar(co::CP::UTF8, co::MBC::NoValue, data)?,
313			Encoding::Utf16be => Self::parse_utf16(data, true),
314			Encoding::Utf16le => Self::parse_utf16(data, false),
315			Encoding::Utf32be
316			| Encoding::Utf32le
317			| Encoding::Scsu
318			| Encoding::Bocu1
319			| Encoding::Unknown => panic!("Encoding {} not implemented.", encoding),
320		}))
321	}
322
323	fn parse_ansi(data: &[u8]) -> Vec<u16> {
324		data.iter()
325			.take_while(|ch| **ch != 0x0000) // ignore terminating null, if any
326			.map(|ch| *ch as u16) // raw u8 to u16 conversion
327			.collect()
328	}
329
330	fn parse_utf16(data: &[u8], is_big_endian: bool) -> Vec<u16> {
331		let data = if data.len() % 2 == 1 {
332			&data[..data.len() - 1] // if odd number of bytes, discard last one
333		} else {
334			data
335		};
336
337		data.chunks(2)
338			.take_while(|ch2| **ch2 != [0x00, 0x00]) // ignore terminating null, if any
339			.map(|ch2| {
340				if is_big_endian {
341					u16::from_be_bytes(ch2.try_into().unwrap())
342				} else {
343					u16::from_le_bytes(ch2.try_into().unwrap())
344				}
345			})
346			.collect()
347	}
348}
349
350#[derive(PartialEq, Eq)]
351enum ForceHeap {
352	Yes,
353	No,
354}
355
356enum Buffer {
357	Stack([u16; Self::SSO_LEN]),
358	Heap(usize, GlobalFreeGuard), // keep memory size in bytes
359	Unallocated,
360}
361
362impl Default for Buffer {
363	fn default() -> Self {
364		Self::Unallocated
365	}
366}
367
368impl Clone for Buffer {
369	fn clone(&self) -> Self {
370		match self {
371			Self::Unallocated => Self::Unallocated,
372			_ => {
373				let mut new_self = Self::new_alloc_buf(self.buf_len(), ForceHeap::No);
374				self.as_slice()
375					.iter()
376					.zip(new_self.as_mut_slice())
377					.for_each(|(src, dest)| *dest = *src);
378				new_self
379			},
380		}
381	}
382}
383
384impl std::fmt::Debug for Buffer {
385	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
386		let txt = match self.to_string_checked() {
387			Ok(t) => t,
388			Err(e) => format!("PARSING ERROR: {}", e.to_string()),
389		};
390		write!(
391			f,
392			"{}",
393			match self {
394				Self::Stack(_) => format!("STACK({}) \"{}\"", self.buf_len(), txt),
395				Self::Heap(_, _) => format!("HEAP({}) \"{}\"", self.buf_len(), txt),
396				Self::Unallocated => "UNALLOCATED \"\"".to_owned(),
397			}
398		)
399	}
400}
401
402impl Buffer {
403	pub const SSO_LEN: usize = 20;
404
405	#[must_use]
406	fn from_opt_str(s: Option<impl AsRef<str>>) -> Self {
407		match s {
408			Some(s) => Self::from_str(s, ForceHeap::No),
409			None => Self::Unallocated,
410		}
411	}
412
413	#[must_use]
414	fn from_str(s: impl AsRef<str>, force_heap: ForceHeap) -> Self {
415		let s_len = s.as_ref().encode_utf16().count();
416		if s_len == 0 {
417			Self::Unallocated
418		} else {
419			let num_chars = s_len + 1; // room for terminating null
420			let mut new_self = Self::new_alloc_buf(num_chars, force_heap);
421			s.as_ref()
422				.encode_utf16()
423				.zip(new_self.as_mut_slice())
424				.for_each(|(src, dest)| *dest = src);
425			new_self
426		}
427	}
428
429	#[must_use]
430	fn from_str_vec(v: &[impl AsRef<str>]) -> Self {
431		if v.is_empty() {
432			return Self::Unallocated; // no elements yield an empty buffer
433		}
434
435		let tot_chars = v.iter() // number of chars of all strings, including terminating nulls
436			.fold(0, |tot, s| tot + s.as_ref().chars().count() + 1) // include terminating null
437			+ 1; // double terminating null
438		let mut new_self = Self::new_alloc_buf(tot_chars, ForceHeap::No);
439		v.iter()
440			.map(|s| {
441				s.as_ref().encode_utf16().chain(std::iter::once(0x0000)) // append terminating null on each string
442			})
443			.flatten()
444			.zip(new_self.as_mut_slice())
445			.for_each(|(src, dest)| *dest = src);
446		new_self
447	}
448
449	#[must_use]
450	fn from_wchars_count(src: *const u16, num_chars: usize) -> Self {
451		if src.is_null() || num_chars == 0 {
452			Self::Unallocated
453		} else {
454			Self::from_wchars_slice(unsafe { std::slice::from_raw_parts(src, num_chars) })
455		}
456	}
457
458	#[must_use]
459	unsafe fn from_wchars_nullt(src: *const u16) -> Self {
460		Self::from_wchars_count(src, unsafe { ffi::lstrlenW(src) as _ })
461	}
462
463	#[must_use]
464	fn from_wchars_slice(src: &[u16]) -> Self {
465		if src.is_empty() {
466			Self::Unallocated
467		} else {
468			let num_chars = src
469				.iter()
470				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
471				.count() + 1; // room for terminating null
472			let mut new_self = Self::new_alloc_buf(num_chars, ForceHeap::No);
473			src.iter()
474				.take_while(|ch| **ch != 0x0000) // skip terminating null, if any
475				.zip(new_self.as_mut_slice())
476				.for_each(|(src, dest)| *dest = *src);
477			new_self
478		}
479	}
480
481	#[must_use]
482	const fn new() -> Self {
483		Self::Unallocated
484	}
485
486	#[must_use]
487	fn new_alloc_buf(num_chars: usize, force_heap: ForceHeap) -> Self {
488		if num_chars == 0 {
489			Self::Unallocated
490		} else if force_heap == ForceHeap::Yes || num_chars > Self::SSO_LEN {
491			Self::Heap(
492				num_chars * std::mem::size_of::<u16>(),
493				HGLOBAL::GlobalAlloc(
494					co::GMEM::FIXED | co::GMEM::ZEROINIT,
495					num_chars * std::mem::size_of::<u16>(),
496				)
497				.unwrap(), // assume no allocation errors
498			)
499		} else {
500			Self::Stack([0x0000; Self::SSO_LEN])
501		}
502	}
503
504	#[must_use]
505	unsafe fn as_mut_ptr(&mut self) -> *mut u16 {
506		match self {
507			Self::Stack(arr) => arr.as_mut_ptr(),
508			Self::Heap(_, ptr) => ptr.ptr() as _,
509			Self::Unallocated => panic!("Trying to use an unallocated WString buffer."),
510		}
511	}
512
513	#[must_use]
514	fn as_mut_slice(&mut self) -> &mut [u16] {
515		match self {
516			Self::Stack(arr) => arr,
517			Self::Heap(_, ptr) => unsafe {
518				std::slice::from_raw_parts_mut(ptr.ptr() as _, self.buf_len())
519			},
520			Self::Unallocated => &mut [],
521		}
522	}
523
524	#[must_use]
525	fn as_ptr(&self) -> *const u16 {
526		match self {
527			Self::Stack(arr) => arr.as_ptr(),
528			Self::Heap(_, ptr) => ptr.ptr() as _,
529			Self::Unallocated => std::ptr::null(),
530		}
531	}
532
533	#[must_use]
534	fn as_slice(&self) -> &[u16] {
535		match self {
536			Self::Stack(arr) => arr,
537			Self::Heap(_, ptr) => unsafe {
538				std::slice::from_raw_parts(ptr.ptr() as _, self.buf_len())
539			},
540			Self::Unallocated => &[],
541		}
542	}
543
544	#[must_use]
545	const fn buf_len(&self) -> usize {
546		match self {
547			Self::Stack(arr) => arr.len(),
548			Self::Heap(sz_bytes, _) => *sz_bytes / std::mem::size_of::<u16>(),
549			Self::Unallocated => 0,
550		}
551	}
552
553	#[must_use]
554	const fn is_allocated(&self) -> bool {
555		match self {
556			Self::Unallocated => false,
557			_ => true,
558		}
559	}
560
561	#[must_use]
562	fn to_string_checked(&self) -> Result<String, std::string::FromUtf16Error> {
563		match self {
564			Self::Unallocated => Ok(String::new()),
565			_ => String::from_utf16(
566				&self
567					.as_slice()
568					.into_iter()
569					.take_while(|ch| **ch != 0x0000) // remove all trailing zeros
570					.map(|ch| *ch)
571					.collect::<Vec<_>>(),
572			),
573		}
574	}
575}